Set Up

  • Install/load necessary R packages
  • Set working directory if necessary (or create a file path to use throughout the RMD to call the data from Box)
  • Read in water year data from CSVs in the intermediate_data directory

Manual Streamflow Checks (2017 - 2019)

  • Read in the manual streamflow checkpoints from the External-MEF_DATA Box folder
    • The raw data is found at the following file path: External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks
  • Clean and manipulate the data into a tidy format
    • Renaming columns to use lower_snake_case and avoid special characters via clean_names()
    • Convert the collected column from a character into a POSIXct date format
    • Remove rows with NA values in the stripchart_stage column
  • Subset the data to include only the data of interest (S2 weir data) and the time period of interest (2017 - 2019)
  • Plot the manual checks data
#create file path to call the data from Box 
## Mia's file path 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks"

#read in the manual checks data 
mc <- read_csv(here(filepath, "2017-2021_S2Stage.csv"))

#clean the data
mc_clean <- mc %>% 
  clean_names() %>% 
  #remove S2 lagging pool data and keep only the S2 weir data
  subset(name == "S2 WEIR") %>%
  mutate(collected = as.POSIXct(collected, format = '%m/%d/%Y %H:%M', 
                                tz = "GMT"),
         year = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         date = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d')
         ) %>% 
  subset(year >= 2017 & year <= 2019) %>% 
  subset(!is.na(stripchart_stage))

#save clean data CSV to use in future RMD files
write.csv(x = mc_clean, 
          file = file.path(here("intermediate_data", "mc_clean.csv")),
          row.names = FALSE)

#plot 
ggplot(data = mc_clean) + 
  geom_point(aes(x = collected, y = stripchart_stage)) + 
  theme_classic() + 
  labs(y = "Stream Height (ft)", 
       x = "Time", 
       title = "S2 Weir Manual Streamflow Checks (2017 - 2019)") + 
  theme(plot.title = element_text(hjust = 0.5))

Streamflow + Manual Checks (2017 - 2019)

  • Combine all 3 water years using rbind()

  • Plot stripchart data (as lines) then add the manual checks (as points) on top

#combine 2017 - 2019 data
all_streamflow <- rbind(wy2017_clean, wy2018_altered, wy2019_clean)

#save clean data CSV to use in future RMD files
write.csv(x = all_streamflow, 
          file = file.path(here("intermediate_data", "all_streamflow.csv")),
          row.names = FALSE)

#plot
p_all <- ggplot(data = all_streamflow) + 
  geom_line(aes(x = datetime, y = stream_height_ft),
            size = 0.25) + 
  geom_point(data = mc_clean, 
             aes(x = collected, 
                 y = stripchart_stage), 
             color = "red",
             size = 0.5) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Stream Height (ft)",
       title = "WY 2017 - 2019 S2 Bog Stream Height",
       subtitle = "Stripchart data are plotted as black lines. Manual checks are plotted as red dots.") + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5))

#static plot
#p_all

#save PNG file
ggsave(filename = "streamflow_with_manual_checks.png",
       plot = p_all,
       path = "figures/",
       width = 6,
       height = 3,
       units = c("in"),
       dpi = 300)

#interactive plot 
ggplotly(p_all)

1:1 Stripchart vs Manual Checkpoints (2017 - 2019)

Next, we are interested in a 1:1 comparison of stripchart data vs manual checkpoints data at the exact same timestamp. Since the stripchart data and manual checkpoints do not align perfectly, we will interpolate the stripchart stream flow values using the zoo package to estimate stripchart values at the time of the manual checks.

#clean the data 
mc_sub <- mc_clean %>% 
  #extract the data (without the timestamp)
  mutate(date = format(as.POSIXct(collected, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT")) %>% 
  #rename column
  rename(datetime = collected) %>% 
  #remove unnecessary columns 
  select(-site, -lab_id, -name, -point_gage, -logger_stage) 

#identify date ranges of interest: 2017-04-04 to 2019-12-31
##aka the range of the manual checkpoint data
max_date <- max(mc_sub$date)
min_date <- min(mc_sub$date)

#subset stripchart data to fit within the time range of the manual checkpoints
streamflow_sub <- all_streamflow %>% 
  subset(date <= max_date & date >= min_date) 

#join the stripchart data and manual checks data 
streamflow_mc_fj <- full_join(x = mc_sub, 
                  y = streamflow_sub, 
                  by = c("datetime", "date")) %>% 
  rename(manual_check = stripchart_stage,
         stripchart = stream_height_ft) %>% 
  #rearrange dataframe into a long format 
  pivot_longer(cols = c("manual_check", "stripchart"),
               names_to = "types",
               values_to = "stream_height_ft") %>% 
  #interpolate stripchart values to fill in NA values for timestamps of interest
  mutate(approx = na.approx(stream_height_ft, 
                            method = "linear")) %>% 
  select(-stream_height_ft) %>% 
  pivot_wider(names_from = "types",
              values_from = "approx")

#left join the joined data and manual checks data to keep only the timestamps of interest 
streamflow_mc_lj <- left_join(x = mc_sub, 
                     y = streamflow_mc_fj,
                     by = c("datetime", "year", "date")) %>% 
  mutate(manual_check = as.numeric(manual_check),
         stripchart = as.numeric(stripchart))

#save clean data CSV to use in future RMD files
write.csv(x = streamflow_mc_lj, 
          file = file.path(here("intermediate_data", "streamflow_mc_lj.csv")),
          row.names = FALSE)

p_1to1 <- ggplot() + 
  geom_point(data = streamflow_mc_lj, 
             aes(x = manual_check, y = stripchart),
             alpha = 0.75) + 
  theme_classic() +
  labs(x = "Manual Checkpoints",
       y = "Stripchart Data",
       title = "S2 Manual Checks vs Interpolated Stripchart Values (2017 - 2019)",
       subtitle = "The line y = x is plotted for reference.") +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5)) + 
  geom_abline(slope = 1, intercept = 0) + 
  xlim(0, 0.4) + 
  ylim(0, 0.4)

#static plot
#p_1to1

#save the figure 
ggsave(filename = "streamflow_mc_comparison.png",
       plot = p_1to1,
       path = "figures/",
       width = 6,
       height = 5,
       units = c("in"),
       dpi = 300)

#interactive plot 
ggplotly(p_1to1)
streamflow_mc_diff <- streamflow_mc_lj %>% 
  #calculate the difference between the stripchart - interpolated value
  mutate(diff = stripchart - manual_check)

#save clean data CSV to use in future RMD files
write.csv(x = streamflow_mc_diff, 
          file = file.path(here("intermediate_data", "streamflow_mc_diff.csv")),
          row.names = FALSE)

#plot the difference in values (stripchart - manual checks)
p_diff <- ggplot(data = streamflow_mc_diff) + 
  geom_point(aes(x = datetime, y = diff), 
             size = 0.5) + 
  geom_line(aes(x = datetime, y = diff)) + 
  theme_classic() + 
  labs(x = "Time",
       y = "Difference in Stream Height (ft)",
       title = "Difference Between Stripcharts and Manual Checks",
       subtitle = "Differences were calculated as stripchart value - manual checks value.") + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5))

#static plot
p_diff 

#save the figure 
ggsave(filename = "streamflow_mc_diff.png",
       plot = p_diff,
       path = "figures/",
       width = 6,
       height = 3,
       units = c("in"),
       dpi = 300)

Old Shaft Encoder Data (2017 - 2019)

  • Read in the manual checks data from the External-MEF_DATA Box folder

    • Specifically, raw data is found at the following file path: External-MEF_DATA/Hydro/Streamflow/L1_subdaily
  • Clean and manipulate the data into a tidy format

    • Renaming columns to use lower_snake_case and avoid special characters via clean_names()
    • Remove extraneous columns
    • Rename columns to join the data later
  • Plot the old shaft encoder data

#create file path to call the data from Box 
## Mia's file path 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L1_subdaily/"

#read in the 2019 data 
se_data <- read_csv(here(filepath, "S2_5min_2017-2019.csv"))

#clean the data 
se_clean <- se_data %>% 
  clean_names() %>% 
  select(-x1, -record, -cond, -ct, -streamflow_cfs, -streamflow_lps, -temp_c) %>% 
  rename(datetime = timestamp)

#save clean data CSV to use in future RMD files
write.csv(x = se_clean, 
          file = file.path(here("intermediate_data", "se_clean.csv")),
          row.names = FALSE)

#plot 
ggplot() + 
  geom_line(data = se_clean, aes(x = datetime, y = stage)) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Stream Height (ft)",
       title = "Shaft Encoder S2 Bog (2017 - 2019)") + 
  theme(plot.title = element_text(hjust = 0.5))